Problem 1

# Rcpp library
library(Rcpp)

# Defining GCD and LCM functions
cppFunction('
int gcd(int a, int b) {
    if (b == 0) return abs(a);
    return gcd(b, a % b);
}
')

cppFunction('
int lcm(int a, int b) {
    return abs(a*b) / std::gcd(a,b);
}
')
# Defining constructor
setClass("Rational",
         slots=c(numerator="numeric", denominator="numeric"))

# Defining validator
setValidity("Rational", function(object){
  if(object@denominator==0){
    return("Denominator is non-zero")
  }
  TRUE
  })
## Class "Rational" [in ".GlobalEnv"]
## 
## Slots:
##                               
## Name:    numerator denominator
## Class:     numeric     numeric
##' @title Create a `rational` object
##' @param numerator and denominator
##' @return rational
##' @export
create_rational<-function(numerator, denominator) {
  invisible(new("Rational", numerator=numerator, denominator=denominator))
}

# Defining a show method
setMethod("show", "Rational",
  function(object) {
    cat(paste(object@numerator,"/",object@denominator,"\n"))
  }
)

# Defining a simplify method
simplify <- function(x) stop("This function needs specfic definition")

setMethod("simplify", "Rational", function(x) {
  if (x@numerator==0) {
    print(0)
  } else {
    gcd_num <- gcd(x@numerator, x@denominator)
    x@numerator= x@numerator/gcd_num
    x@denominator=x@denominator/gcd_num
    print(x)
  }
})
## Creating a generic function from function 'simplify' in the global environment
# Defining quotient method
quotient <- function(x,...) stop("This function needs specfic definition")

setMethod("quotient", "Rational", function(x, digits = NULL) {
  val <- x@numerator / x@denominator
  if (!is.null(digits) && is.numeric(digits)) {
    format_val <- format(val,digits=digits)
    print(format_val)
  } else {
    print(val)
  }
})
## Creating a generic function from function 'quotient' in the global environment
# Defining Addition
setMethod("+", signature(e1="Rational", 
                         e2="Rational"), 
          function(e1, e2) {
          common_denom=lcm(e1@denominator, e2@denominator)
          numerator2=(e1@numerator*(common_denom/e1@denominator))+
                   (e2@numerator*(common_denom/e2@denominator))
          simplify(create_rational(numerator2, common_denom))
})

# Defining Subtraction
setMethod("-", signature(e1="Rational", 
                         e2="Rational"), 
          function(e1, e2) {
          common_denom=lcm(e1@denominator, e2@denominator)
          numerator2=(e1@numerator*(common_denom/e1@denominator))-
                   (e2@numerator*(common_denom/e2@denominator))
          simplify(create_rational(numerator2, common_denom))
})

# Defining Multiplication
setMethod("*", signature(e1="Rational", 
                         e2="Rational"), 
          function(e1, e2) {
          numerator2=e1@numerator*e2@numerator
          denominator2=e1@denominator*e2@denominator
          simplify(create_rational(numerator2,denominator2))
})

# Defining Division
setMethod("/", signature(e1="Rational", 
                         e2="Rational"), 
          function(e1, e2) {
          numerator2=e1@numerator*e2@denominator
          denominator2=e1@denominator*e2@numerator 
          simplify(create_rational(numerator2,denominator2))
})
# Creating Rational objects
r1 <- create_rational(24,6)
r2 <- create_rational(7,230)
r3 <- create_rational(0,4)
# Evaluating the following code
r1
## 24 / 6
r3
## 0 / 4
r1 + r2
## 927 / 230
## 927 / 230
r1 - r2
## 913 / 230
## 913 / 230
r1 * r2
## 14 / 115
## 14 / 115
r1 / r2
## 920 / 7
## 920 / 7
r1 + r3
## 4 / 1
## 4 / 1
r1 * r3
## [1] 0
## [1] 0
r2 / r3
## Error in h(simpleError(msg, call)): error in evaluating the argument 'x' in selecting a method for function 'simplify': invalid class "Rational" object: Denominator is non-zero
quotient(r1)
## [1] 4
quotient(r2)
## [1] 0.03043478
quotient(r2, digits = 3)
## [1] "0.0304"
quotient(r2, digits = 3.14)
## [1] "0.0304"
quotient(r2, digits = "avocado")
## [1] 0.03043478
q2 <- quotient(r2, digits = 3)
## [1] "0.0304"
q2
## [1] "0.0304"
quotient(r3)
## [1] 0
simplify(r1)
## 4 / 1
simplify(r2)
## 7 / 230
simplify(r3)
## [1] 0

Problem 2

# Loading packages
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.1     ✔ tibble    3.2.1
## ✔ lubridate 1.9.3     ✔ tidyr     1.3.1
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(plotly)
## 
## Attaching package: 'plotly'
## 
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## 
## The following object is masked from 'package:stats':
## 
##     filter
## 
## The following object is masked from 'package:graphics':
## 
##     layout
# Loading the dataset
p2=read_csv("df_for_ml_improved_new_market.csv")
## Rows: 4347 Columns: 112
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr   (1): eventdate
## dbl (111): id, case_id, year, height, width, size_inchsqr, price_usd, meanpr...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

A

# Renaming genre related columns
p2=p2 %>% rename(Photography=Genre___Photography,Painting=Genre___Painting,
                 Sculpture=Genre___Sculpture,Print=Genre___Print,Others=Genre___Others)

# Making a long dataset that indicates genre for each painting
p2b=p2 %>% pivot_longer(cols=c(Photography,Painting,Sculpture,Print,Others),
                        names_to = "genre",values_to = "count") %>% filter(count %in% 1) 

# Generating percentage within each year for each genre
p2b2=p2b %>% group_by(year, genre) %>% summarise(count = n()) %>% ungroup() %>%
  group_by(year) %>% mutate(percent = count / sum(count) * 100)
## `summarise()` has grouped output by 'year'. You can override using the
## `.groups` argument.
# Generating stacked bar plot using plotly
plot_ly(data=p2b2,x= ~year,y= ~percent,color= ~genre,type='bar') %>%
  layout(title = "Distribution of Genre of Sales Across Years",xaxis=list(title="Year"),
    yaxis=list(title="Percentage"),barmode="stack")

B

# Calculate average price per year for overall trend
overall_p=p2b %>% group_by(year) %>%
  summarize(average_price = mean(price_usd, na.rm=TRUE)) %>% mutate(Genre ="Overall")  

# Calculate average price per year and genre
genre_p=p2b %>% group_by(year,genre) %>%
  summarize(average_price =mean(price_usd, na.rm=TRUE)) %>%
  rename(Genre=genre) %>% ungroup()
## `summarise()` has grouped output by 'year'. You can override using the
## `.groups` argument.
# Plotting overall trend using plotly
fig <- plot_ly() %>%
  add_trace(data =overall_p, x = ~year, y = ~average_price, type ='scatter', 
            mode='lines+markers', visible =TRUE)

# Add traces for each genre, initially hidden
for (genre in unique(genre_p$Genre)) {
  fig <- fig %>%
    add_trace(data = subset(genre_p, Genre==genre), x = ~year, y = ~average_price, type ='scatter', 
              mode ='lines+markers',visible =FALSE)
}

# Creating dropdown buttons
buttons <- list(
  list(method = "restyle", args =list("visible",c(TRUE,rep(FALSE,5))),label="Overall"))

# Adding the button to the plot
for (i in 1:5) {
  genre <- unique(genre_p$Genre)[i]
  visibility <- rep(FALSE, length(unique(genre_p$Genre)) + 1)  
  visibility[i+1]=TRUE
  buttons <- append(buttons,list(list(method="restyle", args =list("visible", visibility),label=genre)))
}

# Adding layouts
fig %>% layout(title = "Average Sales Price Over Time",xaxis = list(title = "Year"),
    yaxis = list(title = "Average Sales Price (USD)"),
    updatemenus=list(list(type="dropdown",buttons=buttons,showactive=TRUE)),showlegend=FALSE)

Problem 3

# Loading packages
library(nycflights13)
library(data.table)
## 
## Attaching package: 'data.table'
## The following objects are masked from 'package:lubridate':
## 
##     hour, isoweek, mday, minute, month, quarter, second, wday, week,
##     yday, year
## The following objects are masked from 'package:dplyr':
## 
##     between, first, last
## The following object is masked from 'package:purrr':
## 
##     transpose
# Generate a table reporting the mean and median departure delay per airport
flights=data.table(flights)
p3a=merge(flights[, faa := origin],airports, by = "faa", all.x = TRUE)
p3a[,.(N =.N, mean_delay = mean(dep_delay, na.rm = TRUE), median_delay = median(dep_delay, na.rm = TRUE)),
       by = name] |> _[N >=10, !"N"] |> _[order(mean_delay, decreasing = TRUE)]
# Generating a table reporting the mean and median arrival delay per airport
flights=data.table(flights)
p3a2=merge(flights[, faa := dest],airports,by = "faa",
                all.x = TRUE)

p3a2[,.(name = ifelse(is.na(first(name)), first(faa), first(name)),
           N = .N,mean_delay = mean(arr_delay, na.rm = TRUE),
           median_delay = median(arr_delay, na.rm = TRUE)),by = faa][N >=10,!c("faa", "N")][order(-mean_delay)] |> print(x = _, nrows = 1e3)
##                                      name   mean_delay median_delay
##                                    <char>        <num>        <num>
##   1:                Columbia Metropolitan  41.76415094         28.0
##   2:                           Tulsa Intl  33.65986395         14.0
##   3:                    Will Rogers World  30.61904762         16.0
##   4:                 Jackson Hole Airport  28.09523810         15.0
##   5:                        Mc Ghee Tyson  24.06920415          2.0
##   6:               Dane Co Rgnl Truax Fld  20.19604317          1.0
##   7:                        Richmond Intl  20.11125320          1.0
##   8:        Akron Canton Regional Airport  19.69833729          3.0
##   9:                      Des Moines Intl  19.00573614          0.0
##  10:                   Gerald R Ford Intl  18.18956044          1.0
##  11:                      Birmingham Intl  16.87732342         -2.0
##  12:         Theodore Francis Green State  16.23463687          1.0
##  13: Greenville-Spartanburg International  15.93544304         -0.5
##  14:    Cincinnati Northern Kentucky Intl  15.36456376         -3.0
##  15:            Savannah Hilton Head Intl  15.12950601         -1.0
##  16:          Manchester Regional Airport  14.78755365         -3.0
##  17:                          Eppley Afld  14.69889841         -2.0
##  18:                               Yeager  14.67164179         -1.5
##  19:                     Kansas City Intl  14.51405836          0.0
##  20:                          Albany Intl  14.39712919         -4.0
##  21:                General Mitchell Intl  14.16722038          0.0
##  22:                       Piedmont Triad  14.11260054         -2.0
##  23:               Washington Dulles Intl  13.86420212         -3.0
##  24:               Cherry Capital Airport  12.96842105        -10.0
##  25:              James M Cox Dayton Intl  12.68048606         -3.0
##  26:     Louisville International Airport  12.66938406         -2.0
##  27:                  Chicago Midway Intl  12.36422360         -1.0
##  28:                      Sacramento Intl  12.10992908          4.0
##  29:                    Jacksonville Intl  11.84483416         -2.0
##  30:                       Nashville Intl  11.81245891         -2.0
##  31:                Portland Intl Jetport  11.66040210         -4.0
##  32:               Greater Rochester Intl  11.56064461         -5.0
##  33:      Hartsfield Jackson Atlanta Intl  11.30011285         -1.0
##  34:                Lambert St Louis Intl  11.07846451         -3.0
##  35:                         Norfolk Intl  10.94909344         -4.0
##  36:            Baltimore Washington Intl  10.72673385         -5.0
##  37:                         Memphis Intl  10.64531435         -2.5
##  38:                   Port Columbus Intl  10.60132291         -3.0
##  39:                  Charleston Afb Intl  10.59296847         -4.0
##  40:                    Philadelphia Intl  10.12719014         -3.0
##  41:                  Raleigh Durham Intl  10.05238095         -3.0
##  42:                    Indianapolis Intl   9.94043412         -3.0
##  43:            Charlottesville-Albemarle   9.50000000         -5.0
##  44:               Cleveland Hopkins Intl   9.18161129         -5.0
##  45:        Ronald Reagan Washington Natl   9.06695204         -2.0
##  46:                      Burlington Intl   8.95099602         -4.0
##  47:                 Buffalo Niagara Intl   8.94595186         -5.0
##  48:                Syracuse Hancock Intl   8.90392501         -5.0
##  49:                          Denver Intl   8.60650021         -2.0
##  50:                      Palm Beach Intl   8.56297210         -3.0
##  51:                                  BQN   8.24549550         -1.0
##  52:                             Bob Hope   8.17567568         -3.0
##  53:       Fort Lauderdale Hollywood Intl   8.08212154         -3.0
##  54:                          Bangor Intl   8.02793296         -9.0
##  55:           Asheville Regional Airport   8.00383142         -1.0
##  56:                                  PSE   7.87150838          0.0
##  57:                      Pittsburgh Intl   7.68099053         -5.0
##  58:                       Gallatin Field   7.60000000         -2.0
##  59:                 NW Arkansas Regional   7.46572581         -2.0
##  60:                           Tampa Intl   7.40852503         -4.0
##  61:               Charlotte Douglas Intl   7.36031885         -3.0
##  62:             Minneapolis St Paul Intl   7.27016886         -5.0
##  63:                      William P Hobby   7.17618819         -4.0
##  64:                         Bradley Intl   7.04854369        -10.0
##  65:                     San Antonio Intl   6.94537178         -9.0
##  66:                      South Bend Rgnl   6.50000000         -3.5
##  67:     Louis Armstrong New Orleans Intl   6.49017497         -6.0
##  68:                        Key West Intl   6.35294118          7.0
##  69:                        Eagle Co Rgnl   6.30434783         -4.0
##  70:                Austin Bergstrom Intl   6.01990875         -5.0
##  71:                   Chicago Ohare Intl   5.87661475         -8.0
##  72:                         Orlando Intl   5.45464309         -5.0
##  73:               Detroit Metro Wayne Co   5.42996346         -7.0
##  74:                        Portland Intl   5.14157973         -5.0
##  75:                        Nantucket Mem   4.85227273         -3.0
##  76:                      Wilmington Intl   4.63551402         -7.0
##  77:                    Myrtle Beach Intl   4.60344828        -13.0
##  78:    Albuquerque International Sunport   4.38188976         -5.5
##  79:         George Bush Intercontinental   4.24079040         -5.0
##  80:        Norman Y Mineta San Jose Intl   3.44817073         -7.0
##  81:               Southwest Florida Intl   3.23814963         -5.0
##  82:                       San Diego Intl   3.13916574         -5.0
##  83:              Sarasota Bradenton Intl   3.08243131         -5.0
##  84:            Metropolitan Oakland Intl   3.07766990         -9.0
##  85:   General Edward Lawrence Logan Intl   2.91439222         -9.0
##  86:                   San Francisco Intl   2.67289152         -8.0
##  87:                                  SJU   2.52052659         -6.0
##  88:                         Yampa Valley   2.14285714          2.0
##  89:              Phoenix Sky Harbor Intl   2.09704733         -6.0
##  90:            Montrose Regional Airport   1.78571429        -10.5
##  91:                     Los Angeles Intl   0.54711094         -7.0
##  92:               Dallas Fort Worth Intl   0.32212685         -9.0
##  93:                           Miami Intl   0.29905978         -9.0
##  94:                       Mc Carran Intl   0.25772849         -8.0
##  95:                  Salt Lake City Intl   0.17625459         -8.0
##  96:                           Long Beach  -0.06202723        -10.0
##  97:                Martha\\\\'s Vineyard  -0.28571429        -11.0
##  98:                  Seattle Tacoma Intl  -1.09909910        -11.0
##  99:                        Honolulu Intl  -1.36519258         -7.0
## 100:                                  STT  -3.83590734         -9.0
## 101:            John Wayne Arpt Orange Co  -7.86822660        -11.0
## 102:                    Palm Springs Intl -12.72222222        -13.5
##                                      name   mean_delay median_delay

B

# Table of aircraft model with the fastest average speed
planes=data.table(planes)
p3b =merge(flights,planes,by = "tailnum",all.x = TRUE)
p3b2=p3b[, `:=`(n =.N,
          average_mph = mean(distance/(air_time/60), na.rm = TRUE)),by = model]

p3b2[p3b2[,.I[which.max(average_mph)]],.(model, average_mph, n)]